#! /usr/bin/env R
#
# Code to estimate the PCs
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
message("Log file for code executed at\n")
message(format(Sys.time(), "%a %b %d %X %Y"))
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
library(magrittr)
library(glue);
library(lubridate)
library(stringr)
library(haven);
library(RcppRoll)
library(data.table);
library(statar)
library(zoo)
library(ggplot2)

library(tidyr)
library(dplyr)
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 1. Import and add ISO Country names to currencies:
dt_fx = fread("../task_data/output/FX_daily_long.csv")

dt_fx = dt_fx[, .(date= ymd(date), ticker, base, foreign, prc=as.numeric(value))]

# remove Sri Lanka:
dt_ccodes <- read_dta("./input/country-codes.dta") %>% data.table

dt_fx <- merge(dt_fx, dt_ccodes[, .(base=ISO4217, base_country=ISO3166)], by = c("base"), all.x=T)
dt_fx <- merge(dt_fx, dt_ccodes[, .(foreign=ISO4217, foreign_country=ISO3166)], by = c("foreign"), all.x=T)
dt_fx[base=="TWD",base_country:="TWN"]
dt_fx[foreign=="TWD",foreign_country:="TWN"]
dt_fx[base=="HKD",base_country:="HKG"]
dt_fx[foreign=="HKD",foreign_country:="HKG"]
dt_fx[base=="EUR",base_country:="EUR"]
dt_fx[foreign=="EUR",foreign_country:="EUR"]

#drop weekends on some tickers
dt_fx <- dt_fx[ wday(date) %in% c(2,3,4,5,6) ,] 
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 2. Constructing Base Factors (as in Lustig/Richmond(2018)), but here
setorder(dt_fx, ticker, date)
dt_fx[, datey := year(date) ]
dt_fx[, log_prc := log(prc)]
dt_fx[, d1_prc  := log_prc - tlag(log_prc, n=1L, time=date), by = .(ticker) ]

dt_fx[, range_euro := 0 ]
for (c_euro in c("ATS", "BEF", "FIM", "FRF", "DEM", "GRD", 
                 "IEP", "ITL", "NLG", "PTE", "ESP")){
    dt_fx[ base == c_euro | foreign == c_euro, range_euro := 1 ]
}

# Fix pre euro currencies
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , log_prc := NA ]
dt_fx <- dt_fx[ datey > 1999 & range_euro == 1 , d1_prc := NA ]

# Estimate the base factor (and foreign)
dt_fx[!is.finite(d1_prc), d1_prc := NA ]
dt_fx[, d1_base_factor := mean(d1_prc, na.rm=T), by = .(base, date) ]  # calculated for all countries
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# --- 3. Transform the data to wide to run the PCA:
dt_panel <- dt_fx[, c("date", "base", "d1_base_factor")]

dt_panel = dt_panel[ is.nan(d1_base_factor), d1_base_factor := NA ]
dt_panel_wide = dcast(unique(dt_panel), date ~ base, 
    value.var="d1_base_factor")

dt_panel_wide[, datey := year(date)]
setcolorder(dt_panel_wide, c("date", "datey"))
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
## PCA loop:
dt_pca_projection = data.table()
dt_PCs_archive = data.table(); dt_SDev_archive = data.table()
N_PCA = 6

for (date_iter in 1971:2019) {
  # date_iter = 1971
  message("Processing of year ... ", date_iter)
  
  dt_tmp <- dt_panel_wide[datey == date_iter]
  
  ## selecting only non-missing currencies with more than 100 observations to do the analysis:
  l_names <- colnames(dt_tmp[, -c("date", "datey")])  
  keep_names = c()
  for (name_iter in l_names){
    # name_iter = "USD"
    dt_aux <- dt_tmp[, .(tmp = get(name_iter)) ]
    # check if currency has more than 100 observations. keep only if TRUE
    if ( nrow(dt_aux[!is.na(tmp)]) >= 100 ){
      keep_names = cbind(keep_names, name_iter)
    }
  }
  
  dt_pca_tmp = na.omit(dt_tmp[, c("date", "datey", keep_names), with=F])
  # FLAG DEAL WITH MISSING
  pca <- prcomp( dt_pca_tmp[, -c("date", "datey")], center = TRUE, scale = TRUE) 
  ## Getting the PCA result for every year if you want to check:
  assign(paste0("pca_", date_iter), pca)  
  ## Projecting the data in the first 4 components:
  
  l_names <- colnames(dt_pca_tmp[, -c("date", "datey")])  
  dt_pca_reg_tmp =  cbind(dt_pca_tmp, pca[["x"]][,1:N_PCA]) 
  
  dt_PCs_archive = rbind(dt_PCs_archive,
                         data.table(date=dt_pca_tmp[["date"]], pca[["x"]]), fill=TRUE ) 
  dt_SDev_archive = rbind(dt_SDev_archive,
                          cbind(datey = date_iter, data.table(t(pca$sdev)) ), fill = TRUE)
  
  # message(paste(l_names, collapse=", "))
  for (j in l_names) {
    # j = "AED" # debug
    
    # doing regression
    proj <- lm( get(j) ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, data = dt_pca_reg_tmp)
    currency <- substr( j, nchar(j)-2, nchar(j) )
    # doing it PC way
    proj_alt = pca$rotation[ c(j), 1:N_PCA] * sd(dt_tmp[[j]], na.rm=T)
    resid_alt = pca$rotation[ c(j), N_PCA:(ncol(pca$rotation))] * sd(dt_tmp[[j]], na.rm=T)
    proj_fit = rowSums(proj_alt * pca[["x"]][, 1:N_PCA] )
    resid_fit = rowSums(resid_alt * pca[["x"]][, 1:N_PCA] )
    
    dt_aux <- dt_pca_tmp[, .(date)] 
    dt_aux[, `:=`(fitted = proj$fitted.values, 
                  fitted_alt = proj_fit ) ]
    dt_aux[, `:=`(resid = proj$residuals, 
                  resid_alt = resid_fit) ]
    dt_aux[, year := date_iter]
    dt_aux[, base := currency]
    dt_pca_projection <- rbind(dt_pca_projection, dt_aux)
  }
  
  
}

dt_pca_projection
dt_PCs_archive
dt_SDev_archive
# ---------------------------------------------------------------------


# ---------------------------------------------------------------------
# SAVE THE PCS
setcolorder(dt_pca_projection, c("year", "date", "base"))
fwrite(dt_pca_projection,"./output/base_PCA.csv")

dt_PCs_archive <- merge(dt_PCs_archive, dt_panel_wide[, c("date", "USD")], by = "date")

fwrite(dt_PCs_archive, "./output/PCs_date.csv")
# ---------------------------------------------------------------------




















